Final Project - John Williams
Load libraries
library(tidyverse)
library(raster) #raster()
library(sf) #st_read()
library(ggspatial) #annotation_scale,annotation_north_arrow
library(ggnewscale) #new_scale_color()
#library(ggsn) #scalebar()
library(plotly) #plot_ly()
library(gridExtra) #grid.arrange()
Set working directory
setwd("~/GitHub/DSCI605/Mod16")
Read the files
##################Read csv and shape file into R
# learners will have this data loaded from an earlier episode, st_read needs sf packages
Unemployrate <-read_csv("data/unemployment_county.csv")
## Rows: 37674 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): County, State
## dbl (2): Unemployment Rate, Year
## num (3): Labor Force, Employed, Unemployed
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Crimerate<-read_csv("data/crime_and_incarceration_by_state.csv")
## Rows: 816 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): jurisdiction
## dbl (13): year, prisoner_count, state_population, violent_crime_total, murde...
## lgl (3): includes_jails, crime_reporting_change, crimes_estimated
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
States<-st_read("data/tl_2019_us_state/tl_2019_us_state.shp")
## Reading layer `tl_2019_us_state' from data source
## `C:\Users\jowilli1\Documents\GitHub\DSCI605\Mod16\data\tl_2019_us_state\tl_2019_us_state.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 56 features and 14 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -179.2311 ymin: -14.60181 xmax: 179.8597 ymax: 71.43979
## Geodetic CRS: NAD83
Process US data
######## Focus on Contiguous USA
Contiguous_state <- States%>% filter(STUSPS!="AK"&
STUSPS!="AS"&
STUSPS!="MP"&
STUSPS!="PR"&
STUSPS!="VI"&
STUSPS!="HI"&
STUSPS!="GU")
###Check the length of states.
length(unique(Contiguous_state$STUSPS))
## [1] 49
Process Unemployment data
##############Unemployrate
##Check the length of State
unique(Unemployrate $State)
## [1] "AL" "AK" "AZ" "AR" "CA" "CO" "CT" "DE" "FL" "GA" "HI" "ID" "IL" "IN" "IA"
## [16] "KS" "KY" "LA" "ME" "MD" "MA" "MI" "MN" "MS" "MO" "MT" "NE" "NV" "NH" "NJ"
## [31] "NM" "NY" "NC" "ND" "OH" "OK" "OR" "PA" "RI" "SC" "SD" "TN" "TX" "UT" "VT"
## [46] "VA" "WA" "WV" "WI" "WY"
length(unique(Unemployrate $State))
## [1] 50
Unemployrate <- Unemployrate %>% filter(State!="AK"& State!="HI") %>%
group_by(State,Year) %>%
summarise(Totalforce=sum(`Labor Force`),
Totalemployed=sum(Employed),
Totalunemployed=sum(Unemployed),
Meanrate=mean(`Unemployment Rate`,rm.na=TRUE)
#Meanrate = (Totalunemployed/Totalforce)*100
)
## `summarise()` has grouped output by 'State'. You can override using the
## `.groups` argument.
##Check the length of State again
length(unique(Unemployrate $State))
## [1] 48
##Change the column name “State” into "STUSPS" by using rename().Use filter() to pick some years data from 2007 to 2014.
Unemployrate <- Unemployrate %>%
rename("STUSPS"="State") %>%
filter(Year %in% c(2007:2014))
Process crime data
##############Crimerate
##Check the length of states
unique(Crimerate$jurisdiction)
## [1] "FEDERAL" "ALABAMA" "ALASKA" "ARIZONA"
## [5] "ARKANSAS" "CALIFORNIA" "COLORADO" "CONNECTICUT"
## [9] "DELAWARE" "FLORIDA" "GEORGIA" "HAWAII"
## [13] "IDAHO" "ILLINOIS" "INDIANA" "IOWA"
## [17] "KANSAS" "KENTUCKY" "LOUISIANA" "MAINE"
## [21] "MARYLAND" "MASSACHUSETTS" "MICHIGAN" "MINNESOTA"
## [25] "MISSISSIPPI" "MISSOURI" "MONTANA" "NEBRASKA"
## [29] "NEVADA" "NEW HAMPSHIRE" "NEW JERSEY" "NEW MEXICO"
## [33] "NEW YORK" "NORTH CAROLINA" "NORTH DAKOTA" "OHIO"
## [37] "OKLAHOMA" "OREGON" "PENNSYLVANIA" "RHODE ISLAND"
## [41] "SOUTH CAROLINA" "SOUTH DAKOTA" "TENNESSEE" "TEXAS"
## [45] "UTAH" "VERMONT" "VIRGINIA" "WASHINGTON"
## [49] "WEST VIRGINIA" "WISCONSIN" "WYOMING"
length(unique(Crimerate$jurisdiction))
## [1] 51
head(Crimerate)
## # A tibble: 6 × 17
## jurisdiction includes_jails year prisoner_count crime_reporting_change
## <chr> <lgl> <dbl> <dbl> <lgl>
## 1 FEDERAL FALSE 2001 149852 NA
## 2 ALABAMA FALSE 2001 24741 FALSE
## 3 ALASKA TRUE 2001 4570 FALSE
## 4 ARIZONA FALSE 2001 27710 FALSE
## 5 ARKANSAS FALSE 2001 11489 FALSE
## 6 CALIFORNIA FALSE 2001 157142 FALSE
## # ℹ 12 more variables: crimes_estimated <lgl>, state_population <dbl>,
## # violent_crime_total <dbl>, murder_manslaughter <dbl>, rape_legacy <dbl>,
## # rape_revised <dbl>, robbery <dbl>, agg_assault <dbl>,
## # property_crime_total <dbl>, burglary <dbl>, larceny <dbl>,
## # vehicle_theft <dbl>
###Change the column name
Crimerate <- Crimerate %>%
rename("STUSPS"="jurisdiction") %>%
rename("Year"="year") %>%
filter(STUSPS!="FEDERAL"& STUSPS!="ALASKA"& STUSPS!="HAWAII") %>%
filter(Year %in% c(2007:2014))
##Recheck the data
length(unique(Crimerate$STUSPS))
## [1] 48
head(Crimerate)
## # A tibble: 6 × 17
## STUSPS includes_jails Year prisoner_count crime_reporting_change
## <chr> <lgl> <dbl> <dbl> <lgl>
## 1 ALABAMA FALSE 2007 25253 FALSE
## 2 ARIZONA FALSE 2007 37700 FALSE
## 3 ARKANSAS FALSE 2007 13275 FALSE
## 4 CALIFORNIA FALSE 2007 171444 FALSE
## 5 COLORADO FALSE 2007 22666 FALSE
## 6 CONNECTICUT TRUE 2007 19438 FALSE
## # ℹ 12 more variables: crimes_estimated <lgl>, state_population <dbl>,
## # violent_crime_total <dbl>, murder_manslaughter <dbl>, rape_legacy <dbl>,
## # rape_revised <dbl>, robbery <dbl>, agg_assault <dbl>,
## # property_crime_total <dbl>, burglary <dbl>, larceny <dbl>,
## # vehicle_theft <dbl>
###Changes the state names in the state column "STUSPS"
Crimerate$STUSPS <- state.abb[match(str_to_title(Crimerate$STUSPS),state.name)]
###Calculate the crimerate
Crimerate <- Crimerate %>%
mutate(Crimerate=(violent_crime_total/state_population)*100) %>%
dplyr::mutate_if(is.numeric, round, 1)
Save the data frames
save(list = c("Crimerate", "Unemployrate","Contiguous_state"), file = "CleaneData.Rdata")
Read the data
##################Read data file into R
load(file = "CleaneData.Rdata")
Join Tables
########Join the rational tables and check the missing values
CS_Erate<-right_join(Contiguous_state, Unemployrate,
by = c("STUSPS"))
CS_Erate_Crate <- right_join(CS_Erate, Crimerate,
by = c("STUSPS","Year"))
CS_Erate_Crate1 <- CS_Erate_Crate %>%
dplyr::select(REGION,STUSPS,NAME,Year,Meanrate,Crimerate) %>%
dplyr::rename("Unemplyrate"="Meanrate")
#############Check the missing values
which(is.na(CS_Erate_Crate1$REGION))
## integer(0)
Create Plots
Prep for plots
## Perform the filter for the Year, preserving geometry only from CS_Erate_Crate1
CS_Erate_Crate2 <- CS_Erate_Crate1 %>%
filter(Year == 2014)
Plot 1
# Plot 1 The unemployment spatial map in 2014 over contiguous USA
CS_Erate_Crate2 %>% # Provide data to ggplot
ggplot() + # Start the graph
geom_sf(aes(fill = Unemplyrate), color = "white", size = 0.3) + # Plot the US map
scale_fill_gradientn(colors = c("darkblue", "lightblue"), name = "Year2014-Unemployment Rate",
limits = c(3, 10)) + # Set up the legend
#scale_color_manual(values = c("B"="blue"), labels = c("Area of Interest"), name="") +
labs(title = "Unemployment Rate Map Over Contiguous US") + # Plot title
#scale bar
annotation_scale(location = "bl", width_hint=0.5) + #Alternative scale bar for distance
#scalebar(data=IN, location = "bottomleft", anchor = c(x=-88.5, y=37), dist = 150,
#dist_unit="km", transform=TRUE, model="WGS84", st.dist=0.04)+
## Add North Arrow
annotation_north_arrow(location="br", which_north="true",
style = north_arrow_fancy_orienteering) +
theme(legend.position="right", # Put legend on the right
plot.title = element_text(hjust = 0.5, # Tweak plot title
color = "Gray40",
size = 16,
face = "bold")) +
xlab("Longitude") + ylab("Latitude") # Label x and y axis

Plot 2
#Plot 2 The crimerate spatial map in 2014 over contiguous USA
CS_Erate_Crate2 %>% # Provide data to ggplot
ggplot() + # Start the graph
geom_sf(aes(fill = Crimerate), color = "white", size = 0.3) + # Plot the US map
scale_fill_gradientn(colors = c("darkblue", "lightblue"), name = "Year2014-Crime Rate",
limits = c(.1, .6)) + # Set up the legend
#scale_color_manual(values = c("B"="blue"), labels = c("Area of Interest"), name="") +
labs(title = "Crime Rate Map Over Contiguous US") + # Set up the title
#scale bar
annotation_scale(location = "bl", width_hint=0.5) + #Alternative scale bar for distance
#scalebar(data=IN, location = "bottomleft", anchor = c(x=-88.5, y=37), dist = 150,
#dist_unit="km", transform=TRUE, model="WGS84", st.dist=0.04)+
## Add North Arrow
annotation_north_arrow(location="br", which_north="true",
style = north_arrow_fancy_orienteering) +
theme(legend.position="right", #change 'top' to specific position by c(2.5,0.8) Position legend to the right
plot.title = element_text(hjust = 0.5, # Tweak plot title
color = "Gray40",
size = 16,
face = "bold")) +
xlab("Longitude") + ylab("Latitude") # Label x and y axis

Plot 3
# Plot 3 Region 1 Unemployment
Region1 <- CS_Erate_Crate2 %>%
filter(REGION == 1, Year == 2014)
Region1 %>%
ggplot() +
geom_sf(aes(fill = Unemplyrate), color = "white", size = 0.3) +
scale_fill_gradientn(colors = c("darkblue", "lightblue"), name = "Region 1 - Unemployment Rate") +
#scale_color_manual(values = c("B"="blue"), labels = c("Area of Interest"), name="") +
labs(title = "Region 1 Unemployment Rate Map") +
#scale bar
annotation_scale(location = "bl", width_hint=0.5) +
#scalebar(data=IN, location = "bottomleft", anchor = c(x=-88.5, y=37), dist = 150,
#dist_unit="km", transform=TRUE, model="WGS84", st.dist=0.04)+
## Add North Arrow
annotation_north_arrow(location="br", which_north="true",
# pad_x = unit(2.5, "in"), pad_y = unit(0.2, "in"),
style = north_arrow_fancy_orienteering) +
theme(legend.position="right", #change 'top' to specific position by c(2.5,0.8)
plot.title = element_text(hjust = 0.5,
color = "Gray40",
size = 16,
face = "bold"),
plot.subtitle = element_text(color="blue"),
plot.caption = element_text(color="Gray60")) +
xlab("Longitude") + ylab("Latitude")

Plot 4
#Plot 4 Crime Rate Region 2
Region2 <- CS_Erate_Crate2 %>%
filter(REGION == 2, Year == 2014)
Region2 %>%
ggplot() +
geom_sf(aes(fill = Crimerate), color = "white", size = 0.3) +
scale_fill_gradientn(colors = c("darkblue", "lightblue"), name = "Region 2 - Crime Rate") +
#scale_color_manual(values = c("B"="blue"), labels = c("Area of Interest"), name="") +
labs(title = "Region 2 Crime Rate Map") +
#scale bar
annotation_scale(location = "bl", width_hint=0.5) +
#scalebar(data=IN, location = "bottomleft", anchor = c(x=-88.5, y=37), dist = 150,
#dist_unit="km", transform=TRUE, model="WGS84", st.dist=0.04)+
## Add North Arrow
annotation_north_arrow(location="br", which_north="true",
# pad_x = unit(2.5, "in"), pad_y = unit(0.2, "in"),
style = north_arrow_fancy_orienteering) +
theme(legend.position="right", #change 'top' to specific position by c(2.5,0.8)
plot.title = element_text(hjust = 0.5,
color = "Gray40",
size = 16,
face = "bold"),
plot.subtitle = element_text(color="blue"),
plot.caption = element_text(color="Gray60")) +
xlab("Longitude") + ylab("Latitude")

Plot 5
## Plot 5 Scatter Plot with colors by Region
# Create a plot with colors by Region
CS_Erate_Crate2$geometry <- NULL #Drop geometry column for plotly
plot_ly(
data = CS_Erate_Crate2, # Use this data file
x = ~Crimerate, # Use Crimerate for the x axis
y = ~Unemplyrate, # Use Unemplyrate for the y axis
type = 'scatter', # Create an interactive scatter plot
mode = 'markers', # Create the points at the intersection of the two
color = ~as.factor(REGION) # Color the points by the region they are in
) %>%
layout(
title = "Unemployment Rate and Crime Rate in 2014", # Title
xaxis = list(title = "Crime Rate per 100 people"), # x-axis label
yaxis = list(title = "UnemploymentRates per 100 people") # y-axis label
)
Plot 6
# Plot 6 The time series visualization of Unemployment Rate for some states
#Select states
states_to_plot <- c("California", "Idaho", "Illinois", "Indiana")
CS_Erate_Crate1$geometry <- NULL #Drop geometry column for plotly
# Filter data for the specified states and years
ts_data <- CS_Erate_Crate1 %>%
filter(Year >= 2007, Year <= 2014, NAME %in% states_to_plot)
# Create the plotly Unemployment time series plot
plot_ly(data = ts_data, # Use ts_data for plot
x = ~Year, # Use Year for time series x axis variable
y = ~Unemplyrate, # Use Unemployrate for y axis
color = ~NAME, # Color points by state name
type = 'scatter', # Set type to scatter for lines and markers
mode = 'lines+markers', # Use lines and markers
text = ~paste("Year:", Year, "<br>Unemployment Rate:", Unemplyrate),
hoverinfo = 'text' # Show tooltip text
) %>%
layout(
title = "Unemployment Rate Changes Along with Years", # Title
xaxis = list(title = "Year", tickvals = 2007:2014), # x-axis label and ticks
yaxis = list(title = "Unemployment Rate", range = c(3, 14)), # y-axis label and range
legend = list(title = list(text = "")) # Remove legend title
)
Plot 7
# Plot 7 The time series visualization of Crimerate for some states
# Define states to plot
states_to_plot <- c("California", "Idaho", "Florida", "Indiana")
# Filter data for the specified states and years
ts1_data <- CS_Erate_Crate1 %>%
filter(Year >= 2007, Year <= 2014, NAME %in% states_to_plot)
# Create the plotly time series plot
plot_ly(data = ts1_data, # use ts1_data for plot
x = ~Year, # Use Year for time series x axis variable
y = ~Crimerate, # Use Crimerate variable for y axis
color = ~NAME, # Color points by state name
type = 'scatter', # Set type to scatter for lines and markers
mode = 'lines+markers', # Use lines and markers
text = ~paste("Year:", Year, "<br>Crime Rate:", Crimerate),
hoverinfo = 'text' # Show tooltip text
) %>%
layout(
title = "Crime Rate Changes Along with Years", # Title
xaxis = list(title = "Year", tickvals = 2007:2014), # x-axis label and ticks
yaxis = list(title = "Crime Rate", range = c(.1, .8)), # y-axis label and range
legend = list(title = list(text = "")) # Remove legend title
)